/* SPDX-License-Identifier: GPL-2.0 */
#ifndef SEMINIX_SCHED_H
#define SEMINIX_SCHED_H

#include <ulinux/linux/sched.h>
#include <seminix/linkage.h>
#include <seminix/cache.h>
#include <seminix/rbtree.h>
#include <seminix/list.h>
#include <seminix/llist.h>
#include <seminix/hrtimer.h>
#include <seminix/cpumask.h>
#include <seminix/mm_types.h>
#include <seminix/pid.h>
#include <seminix/plist.h>
#include <seminix/sched/prio.h>
#include <seminix/restart_block.h>
#include <seminix/signal_types.h>

/* Attach to any functions which should be ignored in wchan output. */
#define __sched		__attribute__((__section__(".sched.text")))

/* Linker adds these: start and end of __sched functions */
extern char __sched_text_start[], __sched_text_end[];

/* Used in tsk->state: */
#define TASK_RUNNING			0x0000
#define TASK_INTERRUPTIBLE		0x0001
#define TASK_UNINTERRUPTIBLE	0x0002
#define __TASK_STOPPED			0x0004
/* Used in tsk->exit_state: */
#define EXIT_DEAD			0x0008
#define EXIT_ZOMBIE			0x0010
/* Used in tsk->state again: */
#define TASK_PARKED			0x0020
#define TASK_DEAD			0x0040
#define TASK_WAKEKILL		0x0080
#define TASK_WAKING			0x0100
#define TASK_NOLOAD			0x0200
#define TASK_NEW			0x0400
#define TASK_STATE_MAX		0x0800

/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)

#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)

/* Convenience macros for the sake of wake_up(): */
#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)

/* get_task_state(): */
#define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
                     TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
                     EXIT_DEAD | EXIT_ZOMBIE | TASK_PARKED)

#define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)

#define task_contributes_to_load(task)	((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
                     (task->state & TASK_NOLOAD) == 0)

/*
 * set_current_state() includes a barrier so that the write of current->state
 * is correctly serialised wrt the caller's subsequent test of whether to
 * actually sleep:
 *
 *   for (;;) {
 *	set_current_state(TASK_UNINTERRUPTIBLE);
 *	if (!need_sleep)
 *		break;
 *
 *	schedule();
 *   }
 *   __set_current_state(TASK_RUNNING);
 *
 * If the caller does not need such serialisation (because, for instance, the
 * condition test and condition change and wakeup are under the same lock) then
 * use __set_current_state().
 *
 * The above is typically ordered against the wakeup, which does:
 *
 *   need_sleep = false;
 *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
 *
 * where wake_up_state() executes a full memory barrier before accessing the
 * task state.
 *
 * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
 *
 * However, with slightly different timing the wakeup TASK_RUNNING store can
 * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
 * a problem either because that will result in one extra go around the loop
 * and our @cond test will save the day.
 *
 * Also see the comments of try_to_wake_up().
 */
#define __set_current_state(state_value)				\
    current->state = (state_value)

#define set_current_state(state_value)					\
    smp_store_mb(current->state, (state_value))

/*
 * set_special_state() should be used for those states when the blocking task
 * can not use the regular condition based wait-loop. In that case we must
 * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
 * will not collide with our state change.
 */
#define set_special_state(state_value)					\
    do {								\
        unsigned long flags; /* may shadow */			\
        raw_spin_lock_irqsave(&current->pi_lock, flags);	\
        current->state = (state_value);				\
        raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
    } while (0)

/* Task command name length: */
#define TASK_COMM_LEN			16

extern void scheduler_tick(void);

#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX

extern long schedule_timeout(long timeout);
extern long schedule_timeout_interruptible(long timeout);
extern long schedule_timeout_killable(long timeout);
extern long schedule_timeout_uninterruptible(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);

extern int io_schedule_prepare(void);
extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);

/*
 * Integer metrics need fixed point arithmetic, e.g., sched/fair
 * has a few: load, load_avg, util_avg, freq, and capacity.
 *
 * We define a basic fixed point arithmetic range, and then formalize
 * all these metrics based on that basic range.
 */
#define SCHED_FIXEDPOINT_SHIFT		10
#define SCHED_FIXEDPOINT_SCALE		(1L << SCHED_FIXEDPOINT_SHIFT)

struct load_weight {
    unsigned long			weight;
    u32				inv_weight;
};

/**
 * struct util_est - Estimation utilization of FAIR tasks
 * @enqueued: instantaneous estimated utilization of a task/cpu
 * @ewma:     the Exponential Weighted Moving Average (EWMA)
 *            utilization of a task
 *
 * Support data structure to track an Exponential Weighted Moving Average
 * (EWMA) of a FAIR task's utilization. New samples are added to the moving
 * average each time a task completes an activation. Sample's weight is chosen
 * so that the EWMA will be relatively insensitive to transient changes to the
 * task's workload.
 *
 * The enqueued attribute has a slightly different meaning for tasks and cpus:
 * - task:   the task's util_avg at last task dequeue time
 * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
 * Thus, the util_est.enqueued of a task represents the contribution on the
 * estimated utilization of the CPU where that task is currently enqueued.
 *
 * Only for tasks we track a moving average of the past instantaneous
 * estimated utilization. This allows to absorb sporadic drops in utilization
 * of an otherwise almost periodic task.
 */
struct util_est {
    unsigned int			enqueued;
    unsigned int			ewma;
#define UTIL_EST_WEIGHT_SHIFT		2
} __attribute__((__aligned__(sizeof(u64))));

/*
 * The load_avg/util_avg accumulates an infinite geometric series
 * (see __update_load_avg() in kernel/sched/fair.c).
 *
 * [load_avg definition]
 *
 *   load_avg = runnable% * scale_load_down(load)
 *
 * where runnable% is the time ratio that a sched_entity is runnable.
 * For cfs_rq, it is the aggregated load_avg of all runnable and
 * blocked sched_entities.
 *
 * load_avg may also take frequency scaling into account:
 *
 *   load_avg = runnable% * scale_load_down(load) * freq%
 *
 * where freq% is the CPU frequency normalized to the highest frequency.
 *
 * [util_avg definition]
 *
 *   util_avg = running% * SCHED_CAPACITY_SCALE
 *
 * where running% is the time ratio that a sched_entity is running on
 * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
 * and blocked sched_entities.
 *
 * util_avg may also factor frequency scaling and CPU capacity scaling:
 *
 *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
 *
 * where freq% is the same as above, and capacity% is the CPU capacity
 * normalized to the greatest capacity (due to uarch differences, etc).
 *
 * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
 * themselves are in the range of [0, 1]. To do fixed point arithmetics,
 * we therefore scale them to as large a range as necessary. This is for
 * example reflected by util_avg's SCHED_CAPACITY_SCALE.
 *
 * [Overflow issue]
 *
 * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
 * with the highest load (=88761), always runnable on a single cfs_rq,
 * and should not overflow as the number already hits PID_MAX_LIMIT.
 *
 * For all other cases (including 32-bit kernels), struct load_weight's
 * weight will overflow first before we do, because:
 *
 *    Max(load_avg) <= Max(load.weight)
 *
 * Then it is the load_weight's responsibility to consider overflow
 * issues.
 */
struct sched_avg {
    u64				last_update_time;
    u64				load_sum;
    u64				runnable_load_sum;
    u32				util_sum;
    u32				period_contrib;
    unsigned long			load_avg;
    unsigned long			runnable_load_avg;
    unsigned long			util_avg;
    struct util_est			util_est;
} ____cacheline_aligned;

struct sched_entity {
    /* For load-balancing: */
    struct load_weight      load;
    unsigned long		    runnable_weight;
    struct rb_node		    run_node;
    struct list_head		group_node;
    unsigned int			on_rq;

    u64				exec_start;
    u64				sum_exec_runtime;
    u64				vruntime;
    u64				prev_sum_exec_runtime;

    u64				nr_migrations;

    /*
     * Per entity load average tracking.
     *
     * Put into separate cache line so it does not
     * collide with read-mostly values above.
     */
    struct sched_avg		avg;
};

struct sched_rt_entity {
    struct list_head		run_list;
    unsigned int			time_slice;
    unsigned short			on_rq;
    unsigned short			on_list;

    struct sched_rt_entity		*back;
} __randomize_layout;

struct sched_dl_entity {
    struct rb_node			rb_node;

    /*
     * Original scheduling parameters. Copied here from sched_attr
     * during sched_setattr(), they will remain the same until
     * the next sched_setattr().
     */
    u64				dl_runtime;	/* Maximum runtime for each instance	*/
    u64				dl_deadline;	/* Relative deadline of each instance	*/
    u64				dl_period;	/* Separation of two instances (period) */
    u64				dl_bw;		/* dl_runtime / dl_period		*/
    u64				dl_density;	/* dl_runtime / dl_deadline		*/

    /*
     * Actual scheduling parameters. Initialized with the values above,
     * they are continuously updated during task execution. Note that
     * the remaining runtime could be < 0 in case we are in overrun.
     */
    i64				runtime;	/* Remaining runtime for this instance	*/
    u64				deadline;	/* Absolute deadline for this instance	*/
    unsigned int			flags;		/* Specifying the scheduler behaviour	*/

    /*
     * Some bool flags:
     *
     * @dl_throttled tells if we exhausted the runtime. If so, the
     * task has to wait for a replenishment to be performed at the
     * next firing of dl_timer.
     *
     * @dl_boosted tells if we are boosted due to DI. If so we are
     * outside bandwidth enforcement mechanism (but only until we
     * exit the critical section);
     *
     * @dl_yielded tells if task gave up the CPU before consuming
     * all its available runtime during the last job.
     *
     * @dl_non_contending tells if the task is inactive while still
     * contributing to the active utilization. In other words, it
     * indicates if the inactive timer has been armed and its handler
     * has not been executed yet. This flag is useful to avoid race
     * conditions between the inactive timer handler and the wakeup
     * code.
     *
     * @dl_overrun tells if the task asked to be informed about runtime
     * overruns.
     */
    unsigned int			dl_throttled      : 1;
    unsigned int			dl_boosted        : 1;
    unsigned int			dl_yielded        : 1;
    unsigned int			dl_non_contending : 1;
    unsigned int			dl_overrun	  : 1;

    /*
     * Bandwidth enforcement timer. Each -deadline task has its
     * own bandwidth to be enforced, thus we need one timer per task.
     */
    struct hrtimer			dl_timer;

    /*
     * Inactive timer, responsible for decreasing the active utilization
     * at the "0-lag time". When a -deadline task blocks, it contributes
     * to GRUB's active utilization until the "0-lag time", hence a
     * timer is needed to decrease the active utilization at the correct
     * time.
     */
    struct hrtimer inactive_timer;
};

struct wake_q_node {
    struct wake_q_node *next;
};

struct task_struct {
    struct thread_info  thread_info;
    /* -1 unrunnable, 0 runnable, >0 stopped: */
    volatile long			state;

    /*
     * This begins the randomizable portion of task_struct. Only
     * scheduling-critical items should be added above here.
     */
    randomized_struct_fields_start

    void				*stack;
    atomic_t			usage;
    /* Per task flags (PF_*), defined further below: */
    unsigned int			flags;

    struct llist_node		wake_entry;
    int				on_cpu;
    /* Current CPU: */
    unsigned int			cpu;

    /*
     * recent_used_cpu is initially set as the last CPU used by a task
     * that wakes affine another task. Waker/wakee relationships can
     * push tasks around a CPU where each wakeup moves to the next one.
     * Tracking a recently used CPU allows a quick search for a recently
     * used CPU that may be idle.
     */
    int				recent_used_cpu;
    int				wake_cpu;
    int				on_rq;

    int				prio;
    int				static_prio;
    int				normal_prio;
    unsigned int	rt_priority;

    const struct sched_class	*sched_class;
    struct sched_entity		    se;
    struct sched_rt_entity		rt;
    struct sched_dl_entity		dl;

    unsigned int			policy;
    int				nr_cpus_allowed;
    cpumask_t			cpus_allowed;

    struct list_head		tasks;

    struct plist_node		pushable_tasks;
    struct rb_node			pushable_dl_tasks;

    struct mm_struct		*mm;
    struct mm_struct		*active_mm;

    /* Per-thread vma caching: */
    struct vmacache			vmacache;

    int				exit_state;
    int				exit_code;
    int				exit_signal;
    /* The signal sent when the parent dies: */
    int				pdeath_signal;
    /* JOBCTL_*, siglock protected: */
    unsigned long			jobctl;

    /* Used for emulating ABI behavior of previous Linux versions: */
    unsigned int			personality;

    /* Scheduler bits, serialized by scheduler locks: */
    unsigned			sched_reset_on_fork:1;
    unsigned			sched_contributes_to_load:1;
    unsigned			sched_remote_wakeup:1;

    /* Force alignment to the next boundary: */
    unsigned			:0;

    /* Bit to tell LSMs we're in execve(): */
    unsigned			in_execve:1;
    unsigned			in_iowait:1;
#ifndef TIF_RESTORE_SIGMASK
    unsigned			restore_sigmask:1;
#endif

    unsigned long			atomic_flags; /* Flags requiring atomic access. */

    struct restart_block		restart_block;

    pid_t				pid;
    pid_t				tgid;

#ifdef CONFIG_STACKPROTECTOR
    /* Canary value for the -fstack-protector GCC feature: */
    unsigned long			stack_canary;
#endif
    /*
     * Pointers to the (original) parent process, youngest child, younger sibling,
     * older sibling, respectively.  (p->father can be replaced with
     * p->real_parent->pid)
     */

    /* Real parent process: */
    struct task_struct __rcu	*real_parent;

    /* Recipient of SIGCHLD, wait4() reports: */
    struct task_struct __rcu	*parent;

    /*
     * Children/sibling form the list of natural children:
     */
    struct list_head		children;
    struct list_head		sibling;
    struct task_struct		*group_leader;

    /* PID/PID hash table linkage. */
    struct pid			*thread_pid;
    struct hlist_node		pid_links[PIDTYPE_MAX];
    struct list_head		thread_group;
    struct list_head		thread_node;

    struct completion		*vfork_done;

    /* CLONE_CHILD_SETTID: */
    int __user			*set_child_tid;

    /* CLONE_CHILD_CLEARTID: */
    int __user			*clear_child_tid;

    u64				utime;
    u64				stime;
    u64				gtime;

    /* Context switch counts: */
    unsigned long			nvcsw;
    unsigned long			nivcsw;

    /* Monotonic time in nsecs: */
    u64				start_time;

    /* Boot based time in nsecs: */
    u64				real_start_time;

    /* Objective and real subjective task credentials (COW): */
    const struct cred __rcu		*real_cred;

    /* Effective (overridable) subjective task credentials (COW): */
    const struct cred __rcu		*cred;

    /*
     * executable name, excluding path.
     *
     * - normally initialized setup_new_exec()
     * - access it with [gs]et_task_comm()
     * - lock it with task_lock()
     */
    char				comm[TASK_COMM_LEN];

    /* Filesystem information: */
    struct fs_struct		*fs;

    /* Open file information: */
    struct files_struct		*files;

    /* Signal handlers: */
    struct signal_struct		*signal;
    struct sighand_struct		*sighand;
    sigset_t			blocked;
    sigset_t			real_blocked;
    /* Restored if set_restore_sigmask() was used: */
    sigset_t			saved_sigmask;
    struct sigpending		pending;
    unsigned long			sas_ss_sp;
    usize				sas_ss_size;
    unsigned int			sas_ss_flags;

    struct callback_head		*task_works;

    /* Thread group tracking: */
    u32				parent_exec_id;
    u32				self_exec_id;

    /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
    spinlock_t			alloc_lock;

    /* Protection of the PI data structures: */
    raw_spinlock_t			pi_lock;

    struct wake_q_node		wake_q;

    /*
     * Time slack values; these are used to round up poll() and
     * select() etc timeout values. These are in nanoseconds.
     */
    u64				timer_slack_ns;
    u64				default_timer_slack_ns;

    int				pagefault_disabled;

    /* A live task holds one reference: */
    atomic_t			stack_refcount;

    /*
     * New fields for task_struct should be added above here, so that
     * they are included in the randomized portion of task_struct.
     */
    randomized_struct_fields_end

    /* CPU-specific state of this task: */
    struct thread_struct		thread;
};

static inline struct pid *task_pid(struct task_struct *task)
{
    return task->thread_pid;
}

static inline int pid_alive(const struct task_struct *p)
{
    return p->thread_pid != NULL;
}

static inline pid_t task_pid_nr(struct task_struct *tsk)
{
    return tsk->pid;
}

static inline pid_t task_tgid_nr(struct task_struct *tsk)
{
    return tsk->tgid;
}

pid_t __task_pid_nr(struct task_struct *task, enum pid_type type);

static inline pid_t task_pgrp_nr(struct task_struct *tsk)
{
    return __task_pid_nr(tsk, PIDTYPE_PGID);
}

static inline pid_t task_session_nr(struct task_struct *tsk)
{
    return __task_pid_nr(tsk, PIDTYPE_SID);
}

static inline pid_t task_ppid_nr(const struct task_struct *tsk)
{
    pid_t pid = 0;

    rcu_read_lock();
    if (pid_alive(tsk))
        pid = task_tgid_nr(rcu_dereference(tsk->real_parent));
    rcu_read_unlock();

    return pid;
}

#define TASK_REPORT_IDLE	(TASK_REPORT + 1)
#define TASK_REPORT_MAX		(TASK_REPORT_IDLE << 1)

static inline unsigned int task_state_index(struct task_struct *tsk)
{
    unsigned int tsk_state = READ_ONCE(tsk->state);
    unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;

    BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);

    if (tsk_state == TASK_IDLE)
        state = TASK_REPORT_IDLE;

    return fls(state);
}

static inline char task_index_to_char(unsigned int state)
{
    static const char state_char[] = "RSDTXZPI";

    BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);

    return state_char[state];
}

static inline char task_state_to_char(struct task_struct *tsk)
{
    return task_index_to_char(task_state_index(tsk));
}

/**
 * is_global_init - check if a task structure is init. Since init
 * is free to have sub-threads we need to check tgid.
 * @tsk: Task structure to be checked.
 *
 * Check if a task structure is the first user space task the kernel created.
 *
 * Return: 1 if the task structure is init. 0 otherwise.
 */
static inline int is_global_init(struct task_struct *tsk)
{
    return task_tgid_nr(tsk) == 1;
}

/*
 * Per process flags
 */
#define PF_IDLE			0x00000002	/* I am an IDLE thread */
#define PF_EXITING		0x00000004	/* Getting shut down */
#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
#define PF_VCPU			0x00000010	/* I'm a virtual CPU */
#define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
#define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
#define PF_MCE_PROCESS		0x00000080      /* Process policy on mce errors */
#define PF_SUPERPRIV		0x00000100	/* Used super-user privileges */
#define PF_DUMPCORE		0x00000200	/* Dumped core */
#define PF_SIGNALED		0x00000400	/* Killed by a signal */
#define PF_MEMALLOC		0x00000800	/* Allocating memory */
#define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
#define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
#define PF_NOFREEZE		0x00008000	/* This thread should not be frozen */
#define PF_FROZEN		0x00010000	/* Frozen for system suspend */
#define PF_KSWAPD		0x00020000	/* I am kswapd */
#define PF_MEMALLOC_NOFS	0x00040000	/* All allocation requests will inherit GFP_NOFS */
#define PF_MEMALLOC_NOIO	0x00080000	/* All allocation requests will inherit GFP_NOIO */
#define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
#define PF_KTHREAD		0x00200000	/* I am a kernel thread */
#define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
#define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
#define PF_MEMSTALL		0x01000000	/* Stalled due to lack of memory */
#define PF_UMH			0x02000000	/* I'm an Usermodehelper process */
#define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
#define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
#define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */

/*
 * Only the _current_ task can read/write to tsk->flags, but other
 * tasks can access tsk->flags in readonly mode for example
 * with tsk_used_math (like during threaded core dumping).
 * There is however an exception to this rule during ptrace
 * or during fork: the ptracer task is allowed to write to the
 * child->flags of its traced child (same goes for fork, the parent
 * can write to the child->flags), because we're guaranteed the
 * child is not running and in turn not changing child->flags
 * at the same time the parent does it.
 */
#define clear_stopped_child_used_math(child)	do { (child)->flags &= ~PF_USED_MATH; } while (0)
#define set_stopped_child_used_math(child)	do { (child)->flags |= PF_USED_MATH; } while (0)
#define clear_used_math()			clear_stopped_child_used_math(current)
#define set_used_math()				set_stopped_child_used_math(current)

#define conditional_stopped_child_used_math(condition, child) \
    do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)

#define conditional_used_math(condition)	conditional_stopped_child_used_math(condition, current)

#define copy_to_stopped_child_used_math(child) \
    do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)

/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
#define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
#define used_math()				tsk_used_math(current)

static inline bool is_percpu_thread(void)
{
    return (current->flags & PF_NO_SETAFFINITY) &&
        (current->nr_cpus_allowed  == 1);
}

/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
#define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
#define PFA_SPEC_SSB_DISABLE		3	/* Speculative Store Bypass disabled */
#define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/
#define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
#define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */

#define TASK_PFA_TEST(name, func)					\
    static inline bool task_##func(struct task_struct *p)		\
    { return test_bit(PFA_##name, &p->atomic_flags); }

#define TASK_PFA_SET(name, func)					\
    static inline void task_set_##func(struct task_struct *p)	\
    { set_bit(PFA_##name, &p->atomic_flags); }

#define TASK_PFA_CLEAR(name, func)					\
    static inline void task_clear_##func(struct task_struct *p)	\
    { clear_bit(PFA_##name, &p->atomic_flags); }

TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)

TASK_PFA_TEST(SPREAD_PAGE, spread_page)
TASK_PFA_SET(SPREAD_PAGE, spread_page)
TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)

TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)

TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)

TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)

TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)

TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)

static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
    current->flags &= ~flags;
    current->flags |= orig_flags & flags;
}

extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);

#ifndef cpu_relax_yield
#define cpu_relax_yield() cpu_relax()
#endif

extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);

/**
 * task_nice - return the nice value of a given task.
 * @p: the task in question.
 *
 * Return: The nice value [ -20 ... 0 ... 19 ].
 */
static inline int task_nice(const struct task_struct *p)
{
    return PRIO_TO_NICE((p)->static_prio);
}

extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
extern struct task_struct *idle_task(int cpu);

/**
 * is_idle_task - is the specified task an idle task?
 * @p: the task in question.
 *
 * Return: 1 if @p is an idle task. 0 otherwise.
 */
static inline bool is_idle_task(const struct task_struct *p)
{
    return !!(p->flags & PF_IDLE);
}

extern struct task_struct *curr_task(int cpu);

void yield(void);

extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];

static inline struct thread_info *task_thread_info(struct task_struct *task)
{
    return &task->thread_info;
}

extern struct task_struct *find_task_by_pid(pid_t nr);

extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
extern void wake_up_new_task(struct task_struct *tsk);

extern void kick_process(struct task_struct *tsk);

extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);

static inline void set_task_comm(struct task_struct *tsk, const char *from)
{
    __set_task_comm(tsk, from, false);
}

extern char *__get_task_comm(char *to, usize len, struct task_struct *tsk);
#define get_task_comm(buf, tsk) ({			\
    BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN);	\
    __get_task_comm(buf, sizeof(buf), tsk);		\
})

void scheduler_ipi(void);
extern unsigned long wait_task_inactive(struct task_struct *, long match_state);

/*
 * Set thread flags in other task's structures.
 * See asm/thread_info.h for TIF_xxxx flags available:
 */
static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
    set_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
    clear_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
                      bool value)
{
    update_ti_thread_flag(task_thread_info(tsk), flag, value);
}

static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
    return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
    return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
{
    return test_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline void set_tsk_need_resched(struct task_struct *tsk)
{
    set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}

static inline void clear_tsk_need_resched(struct task_struct *tsk)
{
    clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}

static inline int test_tsk_need_resched(struct task_struct *tsk)
{
    return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}

extern int __cond_resched_lock(spinlock_t *lock);

#define cond_resched_lock(lock) ({				\
    __cond_resched_lock(lock);				\
})

/*
 * Does a critical section need to be broken due to another
 * task waiting?: (technically does not depend on CONFIG_PREEMPT,
 * but a general need for low latency)
 */
static inline int spin_needbreak(spinlock_t *lock)
{
    return spin_is_contended(lock);
}

static __always_inline bool need_resched(void)
{
    return unlikely(tif_need_resched());
}

/*
 * Wrappers for p->thread_info->cpu access. No-op on UP.
 */
static inline unsigned int task_cpu(const struct task_struct *p)
{
    return p->cpu;
}

extern void set_task_cpu(struct task_struct *p, unsigned int cpu);

extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);

#endif /* !SEMINIX_SCHED_H */
